Rosa Huamaní Pucho (20200422)
import pandas as pd
linkData="https://github.com/SocialAnalytics-StrategicIntelligence/TableOperations/raw/main/dengue_ok.pkl"
dengue = pd.read_pickle(linkData)
# checking format
dengue.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 501236 entries, 0 to 501235 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 departamento 501236 non-null object 1 provincia 501236 non-null object 2 distrito 501236 non-null object 3 ano 501236 non-null int64 4 semana 501236 non-null int64 5 sexo 501236 non-null object 6 edad 501236 non-null int64 7 enfermedad 501236 non-null category 8 case 501236 non-null int64 dtypes: category(1), int64(4), object(4) memory usage: 31.1+ MB
# Each row is a person:
dengue.head()
| departamento | provincia | distrito | ano | semana | sexo | edad | enfermedad | case | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | HUANUCO | LEONCIO PRADO | LUYANDO | 2000 | 47 | M | 9 | SIN_SEÑALES | 1 |
| 1 | HUANUCO | LEONCIO PRADO | LUYANDO | 2000 | 40 | F | 18 | SIN_SEÑALES | 1 |
| 2 | HUANUCO | LEONCIO PRADO | JOSE CRESPO Y CASTILLO | 2000 | 48 | F | 32 | SIN_SEÑALES | 1 |
| 3 | HUANUCO | LEONCIO PRADO | JOSE CRESPO Y CASTILLO | 2000 | 37 | F | 40 | SIN_SEÑALES | 1 |
| 4 | HUANUCO | LEONCIO PRADO | MARIANO DAMASO BERAUN | 2000 | 42 | M | 16 | SIN_SEÑALES | 1 |
dengue["enfermedad"].value_counts()
| count | |
|---|---|
| enfermedad | |
| SIN_SEÑALES | 443996 |
| ALARMA | 54981 |
| GRAVE | 2259 |
dengue['enfermedad_text']=dengue.enfermedad.astype(str)
dengue.replace({'enfermedad_text':{'SIN_SEÑALES':'1_SIN_SEÑALES','ALARMA':'2_ALARMA','GRAVE':'3_GRAVE'}},inplace=True)
dengue_2020 =dengue["ano"] == 2020 #escogemos 2020
dengue = dengue[dengue_2020] #ahora el dataframe contiene solo datos correspondientes al 2020
dengue.semana.value_counts(sort=False)
| count | |
|---|---|
| semana | |
| 1 | 570 |
| 5 | 1035 |
| 7 | 1095 |
| 2 | 702 |
| 9 | 1130 |
| 3 | 833 |
| 4 | 944 |
| 33 | 450 |
| 6 | 1062 |
| 11 | 1190 |
| 8 | 1060 |
| 45 | 1614 |
| 34 | 489 |
| 35 | 422 |
| 10 | 1478 |
| 50 | 1476 |
| 46 | 1616 |
| 12 | 1001 |
| 44 | 1415 |
| 43 | 1230 |
| 13 | 955 |
| 48 | 1455 |
| 14 | 1052 |
| 15 | 1028 |
| 16 | 1442 |
| 19 | 958 |
| 17 | 1271 |
| 18 | 1119 |
| 21 | 828 |
| 42 | 1093 |
| 20 | 926 |
| 31 | 323 |
| 22 | 543 |
| 23 | 413 |
| 49 | 1485 |
| 24 | 386 |
| 25 | 358 |
| 40 | 896 |
| 41 | 962 |
| 47 | 1665 |
| 26 | 280 |
| 51 | 1351 |
| 27 | 272 |
| 28 | 208 |
| 29 | 203 |
| 37 | 626 |
| 30 | 217 |
| 32 | 355 |
| 36 | 582 |
| 38 | 739 |
| 39 | 820 |
| 53 | 1183 |
| 52 | 1126 |
binLimits=[0,15,50,110]
theLabels=["a_menor_a_16","b_entre_16y50","c_mayor_a_50"]
dengue["edad_grupos"]=pd.cut(dengue['edad'], include_lowest=True,
bins=binLimits,
labels=theLabels,
ordered=True)
# see
dengue.head()
<ipython-input-7-f8139b7adf58>:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dengue["edad_grupos"]=pd.cut(dengue['edad'], include_lowest=True,
| departamento | provincia | distrito | ano | semana | sexo | edad | enfermedad | case | enfermedad_text | edad_grupos | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 345299 | SAN MARTIN | LAMAS | TABALOSOS | 2020 | 1 | F | 19 | SIN_SEÑALES | 1 | 1_SIN_SEÑALES | b_entre_16y50 |
| 345300 | SAN MARTIN | SAN MARTIN | LA BANDA DE SHILCAYO | 2020 | 1 | F | 19 | SIN_SEÑALES | 1 | 1_SIN_SEÑALES | b_entre_16y50 |
| 345301 | SAN MARTIN | SAN MARTIN | TARAPOTO | 2020 | 1 | M | 32 | SIN_SEÑALES | 1 | 1_SIN_SEÑALES | b_entre_16y50 |
| 345302 | MADRE DE DIOS | TAMBOPATA | TAMBOPATA | 2020 | 1 | M | 49 | SIN_SEÑALES | 1 | 1_SIN_SEÑALES | b_entre_16y50 |
| 345303 | SAN MARTIN | SAN MARTIN | LA BANDA DE SHILCAYO | 2020 | 1 | F | 21 | ALARMA | 1 | 2_ALARMA | b_entre_16y50 |
#!pip install --upgrade altair
Requirement already satisfied: altair in /usr/local/lib/python3.10/dist-packages (5.4.1) Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from altair) (3.1.4) Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair) (4.23.0) Requirement already satisfied: narwhals>=1.5.2 in /usr/local/lib/python3.10/dist-packages (from altair) (1.6.0) Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from altair) (24.1) Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.10/dist-packages (from altair) (4.12.2) Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (24.2.0) Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (2023.12.1) Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (0.35.1) Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (0.20.0) Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->altair) (2.1.5)
#!pip install vegafusion vegafusion-python-embed
Requirement already satisfied: vegafusion in /usr/local/lib/python3.10/dist-packages (1.6.9) Requirement already satisfied: vegafusion-python-embed in /usr/local/lib/python3.10/dist-packages (1.6.9) Requirement already satisfied: altair>=5.2.0 in /usr/local/lib/python3.10/dist-packages (from vegafusion) (5.4.1) Requirement already satisfied: pyarrow>=5 in /usr/local/lib/python3.10/dist-packages (from vegafusion) (14.0.2) Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from vegafusion) (2.1.4) Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from vegafusion) (5.9.5) Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from vegafusion) (3.20.3) Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from altair>=5.2.0->vegafusion) (3.1.4) Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair>=5.2.0->vegafusion) (4.23.0) Requirement already satisfied: narwhals>=1.5.2 in /usr/local/lib/python3.10/dist-packages (from altair>=5.2.0->vegafusion) (1.6.0) Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from altair>=5.2.0->vegafusion) (24.1) Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.10/dist-packages (from altair>=5.2.0->vegafusion) (4.12.2) Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.10/dist-packages (from pyarrow>=5->vegafusion) (1.26.4) Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->vegafusion) (2.8.2) Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->vegafusion) (2024.1) Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->vegafusion) (2024.1) Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=5.2.0->vegafusion) (24.2.0) Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=5.2.0->vegafusion) (2023.12.1) Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=5.2.0->vegafusion) (0.35.1) Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=5.2.0->vegafusion) (0.20.0) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->vegafusion) (1.16.0) Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->altair>=5.2.0->vegafusion) (2.1.5)
import pandas as pd
import altair as alt
alt.data_transformers.enable("vegafusion")
DataTransformerRegistry.enable('vegafusion')
#!pip install vl-convert-python>=1.6.0
intdengue=alt.Chart(dengue)
enc_dengue=intdengue.encode(
x='semana:Q', #Q para datos cuanti
y='mean(edad):Q',
color='enfermedad_text:N', # N para datos nominales categóricos
)
enc_dengue.mark_line() + enc_dengue.mark_errorband()
enc_dengue=intdengue.encode(
x='semana:Q',
y=alt.Y('sum(case):Q'),
color='enfermedad_text:N',
tooltip=['sum(case):Q','semana:Q']
).interactive()
enc_dengue.mark_line().facet(
row='sexo:N',
column='edad_grupos:N'
)
intdengue=alt.Chart(dengue)
enc_dengue=intdengue.encode(
x='semana:Q',
y=alt.Y('sum(case):Q', scale=alt.Scale(type='log')),
column='enfermedad_text:N',
)
enc_dengue.mark_bar()
indexList =["edad_grupos","semana","sexo","enfermedad_text"]
aggregator ={'edad': ['median',"mean","min","max"]}
LevelByWeek_statsFull =dengue.groupby(indexList,observed =True).agg(aggregator)
LevelByWeek_statsFull
| edad | |||||||
|---|---|---|---|---|---|---|---|
| median | mean | min | max | ||||
| edad_grupos | semana | sexo | enfermedad_text | ||||
| a_menor_a_16 | 1 | F | 1_SIN_SEÑALES | 10.0 | 9.064935 | 1 | 15 |
| 2_ALARMA | 9.5 | 9.428571 | 2 | 15 | |||
| 3_GRAVE | 3.0 | 3.000000 | 2 | 4 | |||
| M | 1_SIN_SEÑALES | 10.0 | 9.736842 | 1 | 15 | ||
| 2_ALARMA | 11.5 | 10.444444 | 4 | 15 | |||
| ... | ... | ... | ... | ... | ... | ... | ... |
| c_mayor_a_50 | 52 | M | 2_ALARMA | 64.0 | 63.909091 | 53 | 82 |
| 53 | F | 1_SIN_SEÑALES | 58.0 | 61.458333 | 51 | 92 | |
| 2_ALARMA | 63.0 | 65.000000 | 52 | 87 | |||
| M | 1_SIN_SEÑALES | 58.0 | 62.898305 | 51 | 90 | ||
| 2_ALARMA | 60.0 | 61.000000 | 51 | 78 | |||
752 rows × 4 columns
LevelByWeek_statsFull.stack(future_stack=True)
| edad | |||||
|---|---|---|---|---|---|
| edad_grupos | semana | sexo | enfermedad_text | ||
| a_menor_a_16 | 1 | F | 1_SIN_SEÑALES | median | 10.000000 |
| mean | 9.064935 | ||||
| min | 1.000000 | ||||
| max | 15.000000 | ||||
| 2_ALARMA | median | 9.500000 | |||
| ... | ... | ... | ... | ... | ... |
| c_mayor_a_50 | 53 | M | 1_SIN_SEÑALES | max | 90.000000 |
| 2_ALARMA | median | 60.000000 | |||
| mean | 61.000000 | ||||
| min | 51.000000 | ||||
| max | 78.000000 |
3008 rows × 1 columns
indexList=['semana','departamento','provincia','enfermedad_text']
aggregator={'case':['sum']}
ByWeekPlace=dengue.groupby(indexList,observed=True).agg(aggregator)
ByWeekPlace
| case | ||||
|---|---|---|---|---|
| sum | ||||
| semana | departamento | provincia | enfermedad_text | |
| 1 | AMAZONAS | BAGUA | 2_ALARMA | 1 |
| CUSCO | LA CONVENCION | 1_SIN_SEÑALES | 6 | |
| QUISPICANCHI | 1_SIN_SEÑALES | 6 | ||
| JUNIN | CHANCHAMAYO | 1_SIN_SEÑALES | 11 | |
| 2_ALARMA | 3 | |||
| ... | ... | ... | ... | ... |
| 53 | UCAYALI | ATALAYA | 1_SIN_SEÑALES | 3 |
| CORONEL PORTILLO | 1_SIN_SEÑALES | 243 | ||
| 2_ALARMA | 27 | |||
| PADRE ABAD | 1_SIN_SEÑALES | 14 | ||
| 2_ALARMA | 1 |
2842 rows × 1 columns
# no missing values
ByWeekPlace_wide =ByWeekPlace.unstack().fillna(0)
ByWeekPlace_wide
| case | |||||
|---|---|---|---|---|---|
| sum | |||||
| enfermedad_text | 1_SIN_SEÑALES | 2_ALARMA | 3_GRAVE | ||
| semana | departamento | provincia | |||
| 1 | AMAZONAS | BAGUA | 0.0 | 1.0 | 0.0 |
| CUSCO | LA CONVENCION | 6.0 | 0.0 | 0.0 | |
| QUISPICANCHI | 6.0 | 0.0 | 0.0 | ||
| JUNIN | CHANCHAMAYO | 11.0 | 3.0 | 0.0 | |
| SATIPO | 9.0 | 1.0 | 0.0 | ||
| ... | ... | ... | ... | ... | ... |
| 53 | TUMBES | TUMBES | 20.0 | 0.0 | 0.0 |
| ZARUMILLA | 1.0 | 0.0 | 0.0 | ||
| UCAYALI | ATALAYA | 3.0 | 0.0 | 0.0 | |
| CORONEL PORTILLO | 243.0 | 27.0 | 0.0 | ||
| PADRE ABAD | 14.0 | 1.0 | 0.0 | ||
1872 rows × 3 columns
sumCases=ByWeekPlace_wide.sum(axis=1)
sumCases
| 0 | |||
|---|---|---|---|
| semana | departamento | provincia | |
| 1 | AMAZONAS | BAGUA | 1.0 |
| CUSCO | LA CONVENCION | 6.0 | |
| QUISPICANCHI | 6.0 | ||
| JUNIN | CHANCHAMAYO | 14.0 | |
| SATIPO | 10.0 | ||
| ... | ... | ... | ... |
| 53 | TUMBES | TUMBES | 20.0 |
| ZARUMILLA | 1.0 | ||
| UCAYALI | ATALAYA | 3.0 | |
| CORONEL PORTILLO | 270.0 | ||
| PADRE ABAD | 15.0 |
1872 rows × 1 columns
# here you are:
shareAlarma=ByWeekPlace_wide.loc[:,('case','sum','2_ALARMA')]/sumCases
shareAlarma.name='shareAlarma'
shareAlarma
| shareAlarma | |||
|---|---|---|---|
| semana | departamento | provincia | |
| 1 | AMAZONAS | BAGUA | 1.000000 |
| CUSCO | LA CONVENCION | 0.000000 | |
| QUISPICANCHI | 0.000000 | ||
| JUNIN | CHANCHAMAYO | 0.214286 | |
| SATIPO | 0.100000 | ||
| ... | ... | ... | ... |
| 53 | TUMBES | TUMBES | 0.000000 |
| ZARUMILLA | 0.000000 | ||
| UCAYALI | ATALAYA | 0.000000 | |
| CORONEL PORTILLO | 0.100000 | ||
| PADRE ABAD | 0.066667 |
1872 rows × 1 columns
shareAlarma=shareAlarma.reset_index()
shareAlarma
| semana | departamento | provincia | shareAlarma | |
|---|---|---|---|---|
| 0 | 1 | AMAZONAS | BAGUA | 1.000000 |
| 1 | 1 | CUSCO | LA CONVENCION | 0.000000 |
| 2 | 1 | CUSCO | QUISPICANCHI | 0.000000 |
| 3 | 1 | JUNIN | CHANCHAMAYO | 0.214286 |
| 4 | 1 | JUNIN | SATIPO | 0.100000 |
| ... | ... | ... | ... | ... |
| 1867 | 53 | TUMBES | TUMBES | 0.000000 |
| 1868 | 53 | TUMBES | ZARUMILLA | 0.000000 |
| 1869 | 53 | UCAYALI | ATALAYA | 0.000000 |
| 1870 | 53 | UCAYALI | CORONEL PORTILLO | 0.100000 |
| 1871 | 53 | UCAYALI | PADRE ABAD | 0.066667 |
1872 rows × 4 columns
Encontremos la peor provincia por Región en las semanas del 2020
where = shareAlarma.groupby(['semana','departamento'])['shareAlarma'].idxmax()
worst_prov_Week = shareAlarma.loc[where].reset_index(drop=True)
worst_prov_Week
| semana | departamento | provincia | shareAlarma | |
|---|---|---|---|---|
| 0 | 1 | AMAZONAS | BAGUA | 1.000000 |
| 1 | 1 | CUSCO | LA CONVENCION | 0.000000 |
| 2 | 1 | JUNIN | CHANCHAMAYO | 0.214286 |
| 3 | 1 | LORETO | MARISCAL RAMON CASTILLA | 0.800000 |
| 4 | 1 | MADRE DE DIOS | TAMBOPATA | 0.294686 |
| ... | ... | ... | ... | ... |
| 777 | 53 | MADRE DE DIOS | TAHUAMANU | 0.400000 |
| 778 | 53 | PASCO | OXAPAMPA | 0.000000 |
| 779 | 53 | SAN MARTIN | EL DORADO | 0.500000 |
| 780 | 53 | TUMBES | CONTRALMIRANTE VILLAR | 0.000000 |
| 781 | 53 | UCAYALI | CORONEL PORTILLO | 0.100000 |
782 rows × 4 columns
len(worst_prov_Week.provincia.value_counts())
64
len(worst_prov_Week[worst_prov_Week.shareAlarma>0].provincia.value_counts()) # amount of worst provinces per region - cleaner
58
Aplicamos algunos filtros:
worst_ProvWeek_alarma=worst_prov_Week[worst_prov_Week.shareAlarma>0].loc[:,['departamento','provincia']]
worst_ProvWeek_alarma.reset_index(drop=True,inplace=True)
worst_ProvWeek_alarma
| departamento | provincia | |
|---|---|---|
| 0 | AMAZONAS | BAGUA |
| 1 | JUNIN | CHANCHAMAYO |
| 2 | LORETO | MARISCAL RAMON CASTILLA |
| 3 | MADRE DE DIOS | TAMBOPATA |
| 4 | SAN MARTIN | HUALLAGA |
| ... | ... | ... |
| 464 | JUNIN | SATIPO |
| 465 | LORETO | MARISCAL RAMON CASTILLA |
| 466 | MADRE DE DIOS | TAHUAMANU |
| 467 | SAN MARTIN | EL DORADO |
| 468 | UCAYALI | CORONEL PORTILLO |
469 rows × 2 columns
indexList=['departamento','provincia']
aggregator={'provincia':['count']}
worst_ProvWeek_alarma_Frequency=worst_ProvWeek_alarma.groupby(indexList,observed=True).agg(aggregator)
worst_ProvWeek_alarma_Frequency
| provincia | ||
|---|---|---|
| count | ||
| departamento | provincia | |
| AMAZONAS | BAGUA | 10 |
| CONDORCANQUI | 12 | |
| UTCUBAMBA | 10 | |
| AYACUCHO | CANGALLO | 1 |
| HUANTA | 7 | |
| LA MAR | 7 | |
| PARINACOCHAS | 1 | |
| CAJAMARCA | JAEN | 22 |
| CALLAO | CALLAO | 1 |
| CUSCO | LA CONVENCION | 28 |
| HUANUCO | HUAMALIES | 1 |
| HUANUCO | 1 | |
| LEONCIO PRADO | 11 | |
| PUERTO INCA | 10 | |
| ICA | CHINCHA | 1 |
| ICA | 18 | |
| NAZCA | 2 | |
| PISCO | 6 | |
| JUNIN | CHANCHAMAYO | 19 |
| HUANCAYO | 1 | |
| SATIPO | 31 | |
| LA LIBERTAD | ASCOPE | 4 |
| PACASMAYO | 1 | |
| TRUJILLO | 2 | |
| LAMBAYEQUE | CHICLAYO | 2 |
| LAMBAYEQUE | 1 | |
| LIMA | LIMA | 14 |
| LORETO | ALTO AMAZONAS | 7 |
| DATEM DEL MARAÑON | 1 | |
| LORETO | 8 | |
| MARISCAL RAMON CASTILLA | 9 | |
| MAYNAS | 18 | |
| PUTUMAYO | 3 | |
| REQUENA | 3 | |
| UCAYALI | 1 | |
| MADRE DE DIOS | MANU | 6 |
| TAHUAMANU | 5 | |
| TAMBOPATA | 31 | |
| PASCO | OXAPAMPA | 11 |
| PIURA | PAITA | 1 |
| PIURA | 9 | |
| SULLANA | 3 | |
| PUNO | CARABAYA | 1 |
| SAN MARTIN | BELLAVISTA | 1 |
| EL DORADO | 3 | |
| HUALLAGA | 11 | |
| LAMAS | 2 | |
| MARISCAL CACERES | 6 | |
| MOYOBAMBA | 3 | |
| PICOTA | 1 | |
| RIOJA | 2 | |
| SAN MARTIN | 14 | |
| TOCACHE | 4 | |
| TUMBES | TUMBES | 22 |
| ZARUMILLA | 6 | |
| UCAYALI | ATALAYA | 11 |
| CORONEL PORTILLO | 24 | |
| PADRE ABAD | 18 |
#limpieza final
worst_ProvWeek_alarma_Frequency.columns=['WeekAffected']
worst_ProvWeek_alarma_Frequency=worst_ProvWeek_alarma_Frequency[worst_ProvWeek_alarma_Frequency.WeekAffected>2]
worst_ProvWeek_alarma_Frequency.reset_index(inplace=True)
worst_ProvWeek_alarma_Frequency
| departamento | provincia | WeekAffected | |
|---|---|---|---|
| 0 | AMAZONAS | BAGUA | 10 |
| 1 | AMAZONAS | CONDORCANQUI | 12 |
| 2 | AMAZONAS | UTCUBAMBA | 10 |
| 3 | AYACUCHO | HUANTA | 7 |
| 4 | AYACUCHO | LA MAR | 7 |
| 5 | CAJAMARCA | JAEN | 22 |
| 6 | CUSCO | LA CONVENCION | 28 |
| 7 | HUANUCO | LEONCIO PRADO | 11 |
| 8 | HUANUCO | PUERTO INCA | 10 |
| 9 | ICA | ICA | 18 |
| 10 | ICA | PISCO | 6 |
| 11 | JUNIN | CHANCHAMAYO | 19 |
| 12 | JUNIN | SATIPO | 31 |
| 13 | LA LIBERTAD | ASCOPE | 4 |
| 14 | LIMA | LIMA | 14 |
| 15 | LORETO | ALTO AMAZONAS | 7 |
| 16 | LORETO | LORETO | 8 |
| 17 | LORETO | MARISCAL RAMON CASTILLA | 9 |
| 18 | LORETO | MAYNAS | 18 |
| 19 | LORETO | PUTUMAYO | 3 |
| 20 | LORETO | REQUENA | 3 |
| 21 | MADRE DE DIOS | MANU | 6 |
| 22 | MADRE DE DIOS | TAHUAMANU | 5 |
| 23 | MADRE DE DIOS | TAMBOPATA | 31 |
| 24 | PASCO | OXAPAMPA | 11 |
| 25 | PIURA | PIURA | 9 |
| 26 | PIURA | SULLANA | 3 |
| 27 | SAN MARTIN | EL DORADO | 3 |
| 28 | SAN MARTIN | HUALLAGA | 11 |
| 29 | SAN MARTIN | MARISCAL CACERES | 6 |
| 30 | SAN MARTIN | MOYOBAMBA | 3 |
| 31 | SAN MARTIN | SAN MARTIN | 14 |
| 32 | SAN MARTIN | TOCACHE | 4 |
| 33 | TUMBES | TUMBES | 22 |
| 34 | TUMBES | ZARUMILLA | 6 |
| 35 | UCAYALI | ATALAYA | 11 |
| 36 | UCAYALI | CORONEL PORTILLO | 24 |
| 37 | UCAYALI | PADRE ABAD | 18 |
alt_worstProv=alt.Chart(worst_ProvWeek_alarma_Frequency)
enc_worstProv=alt_worstProv.encode(
y='departamento',
x='provincia',
text='WeekAffected:O',
size='WeekAffected:O'
)
enc_worstProv.mark_text()
indexList=['semana','provincia','enfermedad_text']
aggregator={'case':['sum']}
ByWeekProv=dengue.groupby(indexList,observed=True).agg(aggregator)
ByWeekProv_wide=ByWeekProv.unstack().fillna(0)
ByWeekProvAlarm=ByWeekProv_wide.loc[:,('case','sum','2_ALARMA')]/ByWeekProv_wide.sum(axis=1)
ByWeekProvAlarm.name='alarmShare'
ByWeekProvAlarm=ByWeekProvAlarm.reset_index()
ByWeekProvAlarm
| semana | provincia | alarmShare | |
|---|---|---|---|
| 0 | 1 | ALTO AMAZONAS | 0.285714 |
| 1 | 1 | BAGUA | 1.000000 |
| 2 | 1 | BELLAVISTA | 0.000000 |
| 3 | 1 | CARABAYA | 0.000000 |
| 4 | 1 | CHANCHAMAYO | 0.214286 |
| ... | ... | ... | ... |
| 1867 | 53 | TOCACHE | 0.125000 |
| 1868 | 53 | TUMBES | 0.000000 |
| 1869 | 53 | UCAYALI | 0.140000 |
| 1870 | 53 | UTCUBAMBA | 0.052632 |
| 1871 | 53 | ZARUMILLA | 0.000000 |
1872 rows × 3 columns
ByWeekProvAlarm_focus=ByWeekProvAlarm[ByWeekProvAlarm.alarmShare>0]
ByWeekProvAlarm_focus.describe()
| semana | alarmShare | |
|---|---|---|
| count | 893.000000 | 893.000000 |
| mean | 28.017917 | 0.278162 |
| std | 16.756824 | 0.267728 |
| min | 1.000000 | 0.001248 |
| 25% | 12.000000 | 0.090909 |
| 50% | 29.000000 | 0.190476 |
| 75% | 44.000000 | 0.333333 |
| max | 53.000000 | 1.000000 |
edges=[-1, .10, .25, .5,1]
theLabels=["a.below10%","b.11-25%","c.26-50%","d.above50%"]
ByWeekProvAlarm_focus.loc[:,"alarmLevels"]=pd.cut(ByWeekProvAlarm_focus['alarmShare'],
include_lowest=True,
bins=edges,
labels=theLabels,
ordered=True)
##
ByWeekProvAlarm_focus.head()
| semana | provincia | alarmShare | alarmLevels | |
|---|---|---|---|---|
| 0 | 1 | ALTO AMAZONAS | 0.285714 | c.26-50% |
| 1 | 1 | BAGUA | 1.000000 | d.above50% |
| 4 | 1 | CHANCHAMAYO | 0.214286 | b.11-25% |
| 5 | 1 | CORONEL PORTILLO | 0.222222 | b.11-25% |
| 7 | 1 | EL DORADO | 0.333333 | c.26-50% |
alt_WorstProv=alt.Chart(ByWeekProvAlarm_focus).encode(x='semana:O',
y=alt.Y('provincia:N',
sort=alt.EncodingSortField(field='alarmShare',op='max',order='descending')))
enc1_WorstProv=alt_WorstProv.encode(
color=alt.Color('alarmLevels:O').scale(scheme="lightgreyred", reverse=False)
)
enc1_WorstProv.mark_rect()
enc2_WorstProv=alt_WorstProv.encode(
text=alt.Text('alarmShare:Q', format=".1f"),
opacity=alt.condition('datum.alarmShare >= 0.3', alt.value(1), alt.value(0)))
enc2_WorstProv.mark_text(fontStyle='bold')
enc1_WorstProv.mark_rect() + enc2_WorstProv.mark_text()
!pip install nbconvert
!jupyter nbconvert --to HTML "tarea1_20200422(1)".ipynb